In [49]:
from sklearn import svm
In [52]:
import pandas as pd
import requests
import io
url = 'http://www.amlbook.com/data/zip/features.train'
s = requests.get(url).content.decode('utf-8')
df_train = pd.read_csv(io.StringIO(s), delim_whitespace=True, header=None, names=['digit', 'intensity', 'symmetry'])
url = 'http://www.amlbook.com/data/zip/features.test'
s = requests.get(url).content.decode('utf-8')
df_test = pd.read_csv(io.StringIO(s), delim_whitespace=True, header=None, names=['digit', 'intensity', 'symmetry'])
In [53]:
df_train.head()
Out[53]:
In [54]:
df_test.head()
Out[54]:
In [ ]:
X = df_train[['intensity', 'symmetry']]
Xt = df_test[['intensity', 'symmetry']]
In [79]:
Q = 2
C = 0.01
In [97]:
for digit in range(0,10,2):
y = df_train.digit.apply(lambda x: 1 if x==digit else -1)
yt = df_test.digit.apply(lambda x: 1 if x==digit else -1)
poly_svc = svm.SVC(kernel='poly', degree=Q, C=C).fit(X, y)
ein = 1-(poly_svc.predict(X) == y).mean()
eout = 1-(poly_svc.predict(Xt) == yt).mean()
sv = poly_svc.support_vectors_.shape[0]
print("%s vs all - ein %.4f - eout %.4f - sv %d" %(digit, ein, eout, sv))
In [98]:
for digit in range(1,10,2):
y = df_train.digit.apply(lambda x: 1 if x==digit else -1)
yt = df_test.digit.apply(lambda x: 1 if x==digit else -1)
poly_svc = svm.SVC(kernel='poly', degree=Q, C=C).fit(X, y)
ein = 1-(poly_svc.predict(X) == y).mean()
eout = 1-(poly_svc.predict(Xt) == yt).mean()
sv = poly_svc.support_vectors_.shape[0]
print("%s vs all - ein %.4f - eout %.4f - sv %d" %(digit, ein, eout, sv))
In [153]:
digit=1
digit2=5
idx = df_train.digit.isin([digit, digit2])
X = df_train.ix[idx, ['intensity', 'symmetry']]
y = df_train.ix[idx].digit.apply(lambda x: 1 if x==digit else -1)
idx = df_test.digit.isin([digit, digit2])
Xt = df_test.ix[idx, ['intensity', 'symmetry']]
yt = df_test.ix[idx].digit.apply(lambda x: 1 if x==digit else -1)
In [154]:
idx = df_test.digit.isin([digit, digit2])
Xt = df_test.ix[idx, ['intensity', 'symmetry']]
yt = df_test.ix[idx].digit.apply(lambda x: 1 if x==digit else -1)
In [151]:
Q=2
for C in (0.0001, 0.001, 0.01, 0.1, 1):
poly_svc = svm.SVC(kernel='poly', degree=Q, C=C).fit(X, y)
ein = 1-(poly_svc.predict(X) == y).mean()
eout = 1-(poly_svc.predict(Xt) == yt).mean()
sv = poly_svc.support_vectors_.shape[0]
print("C %.4f - ein %.5f - eout %.5f - sv %d" %(C, ein, eout, sv))
In [152]:
Q=5
for C in (0.0001, 0.001, 0.01, 0.1, 1):
poly_svc = svm.SVC(kernel='poly', degree=Q, C=C).fit(X, y)
ein = 1-(poly_svc.predict(X) == y).mean()
eout = 1-(poly_svc.predict(Xt) == yt).mean()
sv = poly_svc.support_vectors_.shape[0]
print("C %.4f - ein %.5f - eout %.5f - sv %d" %(C, ein, eout, sv))
In [ ]:
Q=2
for C in (0.0001, 0.001, 0.01, 0.1, 1):
poly_svc = svm.SVC(kernel='poly', degree=Q, C=C).fit(X, y)
ein = 1-(poly_svc.predict(X) == y).mean()
eout = 1-(poly_svc.predict(Xt) == yt).mean()
sv = poly_svc.support_vectors_.shape[0]
print("C %.4f - ein %.5f - eout %.5f - sv %d" %(C, ein, eout, sv))
In [223]:
digit=1
digit2=5
idx = df_train.digit.isin([digit, digit2])
X = df_train.ix[idx, ['intensity', 'symmetry']]
y = df_train.ix[idx].digit.apply(lambda x: 1 if x==digit else -1)
In [231]:
import numpy as np
RUNS = 100
C_min_list = np.zeros([RUNS, ])
e_val_min_list = np.zeros([RUNS, ])
Q=2
for i in range(RUNS):
e_val_min = 1
msk = np.random.rand(len(X)) < 0.9
X_train = X[msk]
y_train = y[msk]
X_val = X[~msk]
y_val = y[~msk]
for C in (0.0001, 0.001, 0.01, 0.1, 1):
poly_svc = svm.SVC(kernel='poly', degree=Q, C=C).fit(X_train, y_train)
ein = 1-(poly_svc.predict(X_train) == y_train).mean()
eout = 1-(poly_svc.predict(X_val) == y_val).mean()
sv = poly_svc.support_vectors_.shape[0]
if e_val < e_val_min:
e_val_min = e_val
C_min = C
e_val_min_list[i] = e_val_min
C_min_list[i] = C_min
# print("C %.4f - ein %.5f - eval %.5f - sv %d" %(C, ein, e_val, sv))
In [225]:
for u in np.unique(C_min_list):
print("%f %d" % (u, (C_min_list == u).sum()))